package pic;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;

/**
 * @author yun lang
 * @version 1.0
 * @description: TODO
 * @date 2024/12/10 09:46
 */
public class PicTest {
    public static void main(String[] args) {

        // 初始化 Tesseract 实例
        Tesseract tesseract = new Tesseract();
        // 设置 Tesseract 数据文件路径
        tesseract.setDatapath("/usr/local/share/tessdata/");
        // 设置识别语言
        tesseract.setLanguage("chi_sim");

        File pdfDirectory = new File("/Users/liangpei/Documents/pdf/（提取图片）中研报关单/");
        // 获取目录下的所有PDF文件
        File[] pdfFiles = pdfDirectory.listFiles((dir, name) -> name.toLowerCase().endsWith(".png"));
        if (pdfFiles != null) {
            for (File file : pdfFiles) {
                System.out.println("正在处理文件: " + file.getName());
                //parseTxt(file, "/Users/liangpei/Documents/pdf/");
                parsePic(file, "/Users/liangpei/Documents/pdf/");
            }
        }
    }

    public static void parsePic(File file, String jarPath) {

        ITesseract instance = new Tesseract();
        // instance.setDatapath("path/to/tessdata");

        try {
            BufferedImage img = ImageIO.read(file);
            String result = instance.doOCR(img);
            System.out.println(result);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
